import scrapy
from novel.items import ChapterItem


class BiqugeSpider(scrapy.Spider):
    name = 'biquge'
    allowed_domains = ['www.biquge.info']
    start_urls = ['http://www.biquge.info/']

    def parse(self, response):
        # 解析书本
        novels = response.xpath('//div[@id="hotcontent"]//div[@class="item"]')
        items = []
        save_dir = './data'
        book_dir = ''
        for novel in novels:
            # item['author'] = novel.xpath('.//dt//span/text()').get()
            # item['novel_intro'] = novel.xpath('.//dd/text()').get()

            novel_name = novel.xpath('.//dt/a/text()').get()
            novel_url = novel.xpath('.//dt/a/@href').get()
            items.append(novel_url)
            book_dir = str(save_dir) + '/' + str(novel_name)
        for url in items:
            yield scrapy.Request(url=url, callback=self.parse_chapters, meta={'meta_url': url, 'meta_save': book_dir})

    def parse_chapters(self, response):
        chapter_url = response.meta['meta_url']
        chapter_save = response.meta['meta_save']
        chapters = response.xpath('//div[@id="list"]/dl/dd/a')
        for chapter in chapters:
            content_url = chapter.xpath('./@href').get()
            # chapter_name = chapter.xpath('./text()').get()

            yield scrapy.Request(url=chapter_url + content_url, callback=self.parse_content, meta={'meta_save': chapter_save})

    def parse_content(self, response):
        chapter_save = response.meta['meta_save']

        item = ChapterItem()
        item['chapter_name'] = response.xpath('//h1/text()').get()
        contents = response.xpath("//div[@id='content']/text()").getall()
        item['content'] = "\n".join(contents)
        item['filename'] = str(chapter_save) + '/' + str(item['chapter_name'] + '.txt')
        yield item